
# clear environment
rm(list=ls())

# make sure working directory is set to folder within which this script is saved
getwd()

# load data
lucid <- read.csv("Negativity+Bias_Summer+2020_October+27,+2021_16.10.csv", stringsAsFactors = F)
lucid_info <- read.csv("DataAnalysis_8109252_Negativity_Bias_Summer_2020_2_2020-08-11_1317.csv", stringsAsFactors = F)

# remove test cases
tests <- tolower(lucid_info$Response.ID[lucid_info$PID == "test"])
which(lucid$rid %in% tests) # none
nrow(lucid) # 1,985


#### Data Checks ####

## Survey Time
start_time_code <- strsplit(as.character(lucid$StartDate), " ")
end_time_code <- strsplit(as.character(lucid$EndDate), " ")

start_date <- c()
end_date <- c()
start_hour_min <- c()
end_hour_min <- c()

for(i in 1:length(lucid$StartDate)) {
  start_date <- append(start_date, unlist(start_time_code[[i]])[1])
  end_date <- append(end_date, unlist(end_time_code[[i]])[1])
  start_hour_min <- append(start_hour_min, unlist(start_time_code[[i]])[2])
  end_hour_min <- append(end_hour_min, unlist(end_time_code[[i]])[2])
}

start_hour_min <- strsplit(start_hour_min, ":")
end_hour_min <- strsplit(end_hour_min, ":")

start_hour <- c()
start_minute <- c()
end_hour <- c()
end_minute <- c()

for(i in 1:length(lucid$StartDate)) {
  start_hour <- append(start_hour, unlist(start_hour_min[[i]])[1])
  start_minute <- append(start_minute, unlist(start_hour_min[[i]])[2])
  end_hour <- append(end_hour, unlist(end_hour_min[[i]])[1])
  end_minute <- append(end_minute, unlist(end_hour_min[[i]])[2])
}

start_hour <- as.numeric(start_hour) * 60
start_minute <- as.numeric(start_minute)
start_time <- start_hour + start_minute

end_hour <- as.numeric(end_hour) * 60
end_minute <- as.numeric(end_minute)
end_time <- end_hour + end_minute

same_day <- ifelse(start_date == end_date, 1, 0)
which(same_day == 0) # respondents did not complete over same day, so times are wrong

lucid$total_time <- ifelse(same_day == 1, end_time - start_time, NA)
summary(lucid$total_time) # rerun after subsets

## calculate time spent on BeanFest

start_hour_secs <- lucid$bean_start_hour*60*60
start_min_secs <- lucid$bean_start_minute*60
start_secs <- lucid$bean_start_second

end_hour_secs <- lucid$bean_end_hour*60*60
end_min_secs <- lucid$bean_end_minute*60
end_secs <- lucid$bean_end_second

start_trial <- start_hour_secs+start_min_secs+start_secs
end_trial <- end_hour_secs+end_min_secs+end_secs

task_time <- end_trial - start_trial
task_time <- ifelse(task_time < 0, task_time + 24*60*60, task_time) # went past midnight

lucid$task_min <- task_time/60
summary(lucid$task_min)

## Validation Checks

# remove responses outside of collection window (pilot testing prior to 7/22)
names(table(start_date))
names(table(start_date))[1:15]
names(table(start_date))[16:length(names(table(start_date)))]
good_dates <- names(table(start_date))[16:length(names(table(start_date)))]
lucid <- lucid[which(start_date %in% good_dates), ]
names(table(unique(substr(lucid$StartDate, 1, 9))))

# who passed bot check
length(which(is.na(lucid$bot_1) & is.na(lucid$bot_2) & is.na(lucid$bot_3) &
    is.na(lucid$bot_4) & lucid$bot_5 == 1 & lucid$bot_6 == 1)) 

# who passed consent form
length(which(lucid$Q137 == 4 | lucid$Q137 == 1)) # at one time was coded as 1

# who passed Trump attention check
length(which(lucid$trump == 3)) 

# subset to those who passed validation checks
lucid <- subset(lucid, lucid$trump == 3) 
nrow(lucid) # n = 1456

# remove respondents without a Lucid ID
lucid <- subset(lucid, !is.na(lucid$rid))
sum(is.na(lucid$rid))

# Checking for duplicate RIDs
lucid <- subset(lucid, !duplicated(lucid$rid))
sum(duplicated(lucid$rid))
length(unique(lucid$rid)) # n = 1454

# index variable to track R
lucid$index <- 1:nrow(lucid)

# updated survey time
summary(lucid$total_time) 

#### Coding Negativity Bias ####

# Approach behavior: % of all beans accepted during game phase
approach_bev <- c()
for(i in 1:nrow(lucid)) {
  
  vec <- unlist(strsplit(lucid$decision_vec[i], ",")) # convert to vector
  vec <- ifelse(vec == "null", "1", vec) # convert ignores to no
  vec <- as.numeric(vec) # convert to numeric
  vec <- vec[-1] # remove initialization

  if(length(vec) != 108) { # R did not go through whole task
    approach_bev[i] <- NA    
    next
  }
  
  approach_bev[i] <- length(which(vec == 2)) / length(vec) # prop accepted (2 = accept)
  
}
lucid$avoid_bev <- 1 - approach_bev # flip sign
lucid$avoid_bev_std <- (lucid$avoid_bev - mean(lucid$avoid_bev, na.rm = T)) /
  sd(lucid$avoid_bev, na.rm = T) # standardize

# Overall learning: % of 36 game phase beans correctly identified in test phase (just summary stat)
correct_vec <- c()
correct_pos_vec <- c()
correct_neg_vec <- c()
for(i in 1:nrow(lucid)) {
  
  # Decision Vector
  dec_vec <- unlist(strsplit(lucid$decision_test_vec[i], ",")) # convert to vector
  dec_vec <- ifelse(dec_vec == "null", "4", dec_vec) # convert ignores to no
  dec_vec <- as.numeric(dec_vec) # convert to numeric
  dec_vec <- dec_vec[-1] # remove initialization
  
  if(length(dec_vec) != 56) { # set NA if R didnt finish task
    correct_vec[i] <- NA
    correct_pos_vec[i] <- NA
    correct_neg_vec[i] <- NA
    next
  }
  
  # Mark Vector
  mark_vec <- unlist(strsplit(lucid$bean_mark_test_vec[i], ",")) # convert to vector
  mark_vec <- as.numeric(mark_vec) # convert to numeric
  mark_vec <- mark_vec[-1] # remove initialization
  
  # Value Vector
  val_vec <- unlist(strsplit(lucid$bean_value_test_vec[i], ",")) # convert to vector
  val_vec <- as.numeric(val_vec) # convert to numeric
  val_vec <- val_vec[-1] # remove initialization
  
  temp <- c() # keeps track of correct/incorrect
  temp_pos <- c()
  temp_neg <- c()
  
  for(j in 1:length(mark_vec)) {
    
    if(mark_vec[j] == 0) {
      next
    }
    
    if(val_vec[j] == 10 & dec_vec[j] == 5) {
      temp <- append(temp, 1)
      temp_pos <- append(temp_pos, 1)
    } else if(val_vec[j] == -10 & dec_vec[j] == 4) {
      temp <- append(temp, 1)
      temp_neg <- append(temp_neg, 1)
    } 
  }
  correct_vec[i] <- sum(temp) / 36
  correct_pos_vec[i] <- sum(temp_pos) / 18
  correct_neg_vec[i] <- sum(temp_neg) / 18
}
lucid$learn_per <- correct_vec

# *Learning asymmetry*: the difference in the proportion of correctly classified positive and negative game beans (the 36 seen during game phase) during the test phase (% correct negative - % correct positive)
lucid$neg_bias <- correct_neg_vec - correct_pos_vec
lucid$neg_bias_std <- (lucid$neg_bias - mean(lucid$neg_bias, na.rm = T)) /
  sd(lucid$neg_bias, na.rm = T) # standardize 

## Valence weighting bias: for each person sum the number of non-marked beans approached (+1) and avoided (-1) during the test phase. 
# then regress this as dv against the prop of pos game beans they got correct during test phase and prop of neg game beans they got correct 
# during test phase. Take the residuals from these models and flip their sign 
bean_app <- c()
for(i in 1:nrow(lucid)) {
  
  # Decision Vector
  dec_vec <- unlist(strsplit(lucid$decision_test_vec[i], ",")) # convert to vector
  dec_vec <- ifelse(dec_vec == "null", "4", dec_vec) # convert ignores to no
  dec_vec <- as.numeric(dec_vec) # convert to numeric
  dec_vec <- dec_vec[-1] # remove initialization
  
  if(length(dec_vec) != 56) { # set NA if R didnt finish task
    bean_app[i] <- NA
    next
  }
  
  # Mark Vector
  mark_vec <- unlist(strsplit(lucid$bean_mark_test_vec[i], ",")) # convert to vector
  mark_vec <- as.numeric(mark_vec) # convert to numeric
  mark_vec <- mark_vec[-1] # remove initialization

  temp <- c() # keeps track of beans approached per person
  
  for(j in 1:length(mark_vec)) {
    
    if(mark_vec[j] == 1) {
      next
    }
    
    if(dec_vec[j] == 5) { # if bean approached
      temp <- append(temp, 1)
    } else { temp <- append(temp, -1) }
    
    bean_app[i] <- sum(temp)
  }
}
has_resid <- which(!is.na(bean_app) & !is.na(correct_neg_vec) & !is.na(correct_pos_vec))
m_val_wt <- lm(bean_app ~ correct_pos_vec + correct_neg_vec)
summary(m_val_wt) # use coefs as descriptive stats
lucid$val_wt <- rep(NA, nrow(lucid))
lucid$val_wt[has_resid] <- -resid(m_val_wt)
lucid$val_wt_std <- (lucid$val_wt - mean(lucid$val_wt, na.rm = T)) /
  sd(lucid$val_wt, na.rm = T)# standardize

#### Coding Dependent Variables ####

# all coded such that higher values closer to conservatism expectation

#### Openness ####

## flip questions that were reverse coded
lucid$ipip1 <- 6 - lucid$ipip1
lucid$ipip5 <- 6 - lucid$ipip5

## average and normalize
open_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$ipip1[i], lucid$ipip2[i], lucid$ipip3[i], lucid$ipip4[i],
    lucid$ipip5[i])
  
  set <- na.omit(set)
  open_means <- append(open_means, mean(set))
}

lucid$open <- (open_means - 1) / 4

#### NFC ####

## average and normalize
nfc_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$nfc1[i], lucid$nfc2[i], lucid$nfc3[i], lucid$nfc4[i],
    lucid$nfc5[i], lucid$nfc6[i], lucid$nfc7[i], lucid$nfc8[i],
    lucid$nfc9[i], lucid$nfc10[i])
  
  set <- na.omit(set)
  nfc_means <- append(nfc_means, mean(set))
}

lucid$nfc <- (nfc_means - 1) / 5


#### Conservation ####

## fix 8 to 6 miscoding
lucid$schw2 <- ifelse(lucid$schw2 == 8, 6, lucid$schw2)
lucid$schw3 <- ifelse(lucid$schw3 == 8, 6, lucid$schw3)
lucid$schw4 <- ifelse(lucid$schw4 == 8, 6, lucid$schw4)
lucid$schw5 <- ifelse(lucid$schw5 == 8, 6, lucid$schw5)

##flip questions that were reverse coded
lucid$schw2 <- 7 - lucid$schw2
lucid$schw3 <- 7 - lucid$schw3
lucid$schw5 <- 7 - lucid$schw5

## average and normalize
conserve_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$schw1[i], lucid$schw2[i], lucid$schw3[i], lucid$schw4[i],
    lucid$schw5[i])
  
  set <- na.omit(set)
  conserve_means <- append(conserve_means, mean(set))
}

lucid$conserve <- (conserve_means - 1) / 5

#### Authoritarianism ####

## flip reverse coded items
lucid$auth3 <- 3 - lucid$auth3
lucid$auth5 <- 3 - lucid$auth5

## average and normalize
auth_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$auth1[i], lucid$auth2[i], lucid$auth3[i], lucid$auth4[i],
    lucid$auth5[i])
  
  set <- na.omit(set)
  auth_means <- append(auth_means, mean(set))
}

lucid$auth <- auth_means - 1

#### Moral Traditionalism ####

## flip reverse coded items
lucid$trad2 <- 8 - lucid$trad2
lucid$trad3 <- 8 - lucid$trad3

## average and normalize
trad_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$trad1[i], lucid$trad2[i], lucid$trad3[i], lucid$trad4[i])
  
  set <- na.omit(set)
  trad_means <- append(trad_means, mean(set))
}

lucid$trad <- (trad_means - 1) / 6

#### Limited Gov ####

## flip reverse coded items
lucid$lim2 <- 3 - lucid$lim2
lucid$lim3 <- 3 - lucid$lim3

## average and normalize
limgov_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$lim1[i], lucid$lim2[i], lucid$lim3[i])
  
  set <- na.omit(set)
  limgov_means <- append(limgov_means, mean(set))
}

lucid$limgov <- limgov_means - 1

#### Social Policy ####

## flip reverse coded items 
gm_recode <- ifelse(lucid$gay == 1, 1, ifelse(lucid$gay == 2, 3, 2))
lucid$gay <- gm_recode
lucid$abort <- 5 - lucid$abort

## place all on same scale
lucid$gay <- (lucid$gay - 1) / 2
lucid$affirm <- (lucid$affirm - 1) / 5
lucid$imm <- (lucid$imm - 1) / 3
lucid$abort <- (lucid$abort - 1) / 3

## average and normalize
social_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$gay[i], lucid$affirm[i], lucid$imm[i], lucid$abort[i])
  
  set <- na.omit(set)
  social_means <- append(social_means, mean(set))
}

lucid$social <- social_means

#### Economic Policy ####

## flip reverse coded items
lucid$ssec <- 7 - lucid$ssec

## place all on same scale
lucid$insure <- (lucid$insure - 1) / 6
lucid$ssec <- (lucid$ssec - 1) / 6
lucid$wage <- (lucid$wage - 1) / 5
lucid$tax <- (lucid$tax - 1) / 3

## average and normalize
economic_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$insure[i], lucid$ssec[i], lucid$wage[i], lucid$tax[i])
  
  set <- na.omit(set)
  economic_means <- append(economic_means, mean(set))
}

lucid$economic <- economic_means

#### Political Identity ####

## recode pid7
lucid$pid7 <- ifelse(!is.na(lucid$dstr), lucid$dstr, 
  ifelse(!is.na(lucid$rstr), lucid$rstr, lucid$lean))
table(lucid$pid7[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$pid7[!is.na(lucid$neg_bias_std)])) # 45%, 42, 12

## ideo
table(lucid$ideo[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$ideo[!is.na(lucid$neg_bias_std)])) # 36, 32, 31

## average and normalize
pol_id_means <- c()

for(i in 1:nrow(lucid)) {
  set <- c(lucid$pid7[i], lucid$ideo[i])
  
  set <- na.omit(set)
  pol_id_means <- append(pol_id_means, mean(set))
}

lucid$pol_id <- (pol_id_means - 1) / 6

#### Code Remaining Individual Issues ####

# Military Spending
lucid$milit <- 7 - lucid$milit # flip
lucid$milit <- (lucid$milit - 1) / 5

# Imports
lucid$import <- (lucid$import - 1) / 5

#### Coding Controls ####

## overall sample size (# of respondents who have scores for negativity bias)
sum(!is.na(lucid$neg_bias_std)) # 821

## Age
lucid$age <- 2020 - lucid$born
mean(lucid$age[!is.na(lucid$neg_bias_std)], na.rm = T) # 51
lucid$age <- (lucid$age - min(lucid$age, na.rm = T)) / 
  (max(lucid$age, na.rm = T) - min(lucid$age, na.rm = T))

## Gender
length(which(lucid$gender == 3)) # how many select other
lucid$female <- ifelse(lucid$gender == 2, 1, 0)
table(lucid$female[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$female[!is.na(lucid$neg_bias_std)])) # 51%

## Black
lucid$black <- ifelse(is.na(lucid$race_2) | lucid$race_2 == 0, 0, 1)
table(lucid$black[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$black[!is.na(lucid$neg_bias_std)])) # 9%

# Hispanic
lucid$hisp <- ifelse(is.na(lucid$race_3) | lucid$race_3 == 0, 0, 1)
table(lucid$hisp[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$hisp[!is.na(lucid$neg_bias_std)])) # 4%

## Education
table(lucid$educ[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$educ[!is.na(lucid$neg_bias_std)])) # 18%, 29%, 33%, 19% 
lucid$educ <- (lucid$educ - min(lucid$educ, na.rm = T)) / (max(lucid$educ, na.rm = T) - min(lucid$educ, na.rm = T))

## Income
table(lucid$income[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$income[!is.na(lucid$neg_bias_std)])) # 4%, 36%, 32%, 18%, 10% 
lucid$income <- (lucid$income - min(lucid$income, na.rm = T)) / (max(lucid$income, na.rm = T) - min(lucid$income, na.rm = T))

## Unemployed
lucid$unemp <- ifelse(lucid$employ == 3, 1, 0)
table(lucid$unemp[!is.na(lucid$neg_bias_std)]) / sum(table(lucid$unemp[!is.na(lucid$neg_bias_std)])) # 8%

#### Coding Political Engagement ####

## questions of interest
lucid$attention
lucid$news
lucid$know1 # correct = 1
lucid$know2 # correct = 3
lucid$know3 # correct = 3
lucid$know4 # correct = 2
lucid$know5 # correct = 2

## flips, recodes, normalizing
lucid$attention <- (lucid$attention - 1) / 4

lucid$news <- lucid$news / 7

correct1 <- ifelse(lucid$know1 == 1, 1, 0)
correct2 <- ifelse(lucid$know2 == 3, 1, 0)
correct3 <- ifelse(lucid$know3 == 3, 1, 0)
correct4 <- ifelse(lucid$know4 == 2, 1, 0)
correct5 <- ifelse(lucid$know5 == 2, 1, 0)
total_corr <- c()
for(i in 1:length(lucid$know1)) {
  total_corr <- append(total_corr,
    sum(correct1[i],correct2[i],correct3[i],correct4[i],correct5[i]))
}
lucid$know <- total_corr / 5

## average
engage_means <- c()
for(i in 1:nrow(lucid)) {
  set <- c(lucid$attention[i], lucid$news[i], lucid$know[i])
  
  set <- na.omit(set)
  engage_means <- append(engage_means, mean(set))
}

lucid$engage <- engage_means

#### Export Final Dataset ####
write.csv(lucid, 'neg_bias_analysis_file_20.csv')


